{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Decision Trees for Classification Part 5"
      ],
      "metadata": {}
    },
    {
      "cell_type": "markdown",
      "source": [
        "Decision Tree is classification algorithm to build a model."
      ],
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# fix_yahoo_finance is used to fetch data \n",
        "import fix_yahoo_finance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2014-01-01'\n",
        "end = '2018-08-27'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View Columns\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 downloaded\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "            Open  High   Low  Close  Adj Close    Volume\nDate                                                    \n2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.85</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Create more data\n",
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,-1)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,-1)\n",
        "dataset['Return'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()\n",
        "dataset['Up_Down'] = np.where(dataset['Return'].shift(-1) > dataset['Return'],'Up','Down')\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 3,
          "data": {
            "text/plain": "            Open  High   Low  Close  Adj Close    Volume  Increase_Decrease  \\\nDate                                                                          \n2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200                  1   \n2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300                  1   \n2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100                  0   \n2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700                  0   \n2014-01-09  4.20  4.23  4.05   4.09       4.09  30667600                  0   \n\n            Buy_Sell_on_Open  Buy_Sell    Return Up_Down  \nDate                                                      \n2014-01-03                 1         1  0.012658      Up  \n2014-01-06                 1         1  0.032500    Down  \n2014-01-07                 1        -1  0.012107    Down  \n2014-01-08                -1        -1  0.000000    Down  \n2014-01-09                -1         1 -0.021531      Up  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Return</th>\n      <th>Up_Down</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.012658</td>\n      <td>Up</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.032500</td>\n      <td>Down</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n      <td>0</td>\n      <td>1</td>\n      <td>-1</td>\n      <td>0.012107</td>\n      <td>Down</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n      <td>0</td>\n      <td>-1</td>\n      <td>-1</td>\n      <td>0.000000</td>\n      <td>Down</td>\n    </tr>\n    <tr>\n      <th>2014-01-09</th>\n      <td>4.20</td>\n      <td>4.23</td>\n      <td>4.05</td>\n      <td>4.09</td>\n      <td>4.09</td>\n      <td>30667600</td>\n      <td>0</td>\n      <td>-1</td>\n      <td>1</td>\n      <td>-0.021531</td>\n      <td>Up</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 3,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Create more data\n",
        "dataset['Open_N'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],'Up','Down')\n",
        "dataset['High_N'] = np.where(dataset['High'].shift(-1) > dataset['High'],'Up','Down')\n",
        "dataset['Low_N'] = np.where(dataset['Low'].shift(-1) > dataset['Low'],'Up','Down')\n",
        "dataset['Close_N'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],'Up','Down')\n",
        "dataset['Volume_N'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],'Positive','Negative')\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": "            Open  High   Low  Close  Adj Close    Volume  Increase_Decrease  \\\nDate                                                                          \n2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200                  1   \n2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300                  1   \n2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100                  0   \n2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700                  0   \n2014-01-09  4.20  4.23  4.05   4.09       4.09  30667600                  0   \n\n            Buy_Sell_on_Open  Buy_Sell    Return Up_Down Open_N High_N Low_N  \\\nDate                                                                           \n2014-01-03                 1         1  0.012658      Up     Up     Up    Up   \n2014-01-06                 1         1  0.032500    Down     Up     Up    Up   \n2014-01-07                 1        -1  0.012107    Down     Up     Up    Up   \n2014-01-08                -1        -1  0.000000    Down   Down   Down  Down   \n2014-01-09                -1         1 -0.021531      Up   Down   Down    Up   \n\n           Close_N  Volume_N  \nDate                          \n2014-01-03      Up  Positive  \n2014-01-06      Up  Positive  \n2014-01-07    Down  Negative  \n2014-01-08    Down  Negative  \n2014-01-09      Up  Negative  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Return</th>\n      <th>Up_Down</th>\n      <th>Open_N</th>\n      <th>High_N</th>\n      <th>Low_N</th>\n      <th>Close_N</th>\n      <th>Volume_N</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.012658</td>\n      <td>Up</td>\n      <td>Up</td>\n      <td>Up</td>\n      <td>Up</td>\n      <td>Up</td>\n      <td>Positive</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.032500</td>\n      <td>Down</td>\n      <td>Up</td>\n      <td>Up</td>\n      <td>Up</td>\n      <td>Up</td>\n      <td>Positive</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n      <td>0</td>\n      <td>1</td>\n      <td>-1</td>\n      <td>0.012107</td>\n      <td>Down</td>\n      <td>Up</td>\n      <td>Up</td>\n      <td>Up</td>\n      <td>Down</td>\n      <td>Negative</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n      <td>0</td>\n      <td>-1</td>\n      <td>-1</td>\n      <td>0.000000</td>\n      <td>Down</td>\n      <td>Down</td>\n      <td>Down</td>\n      <td>Down</td>\n      <td>Down</td>\n      <td>Negative</td>\n    </tr>\n    <tr>\n      <th>2014-01-09</th>\n      <td>4.20</td>\n      <td>4.23</td>\n      <td>4.05</td>\n      <td>4.09</td>\n      <td>4.09</td>\n      <td>30667600</td>\n      <td>0</td>\n      <td>-1</td>\n      <td>1</td>\n      <td>-0.021531</td>\n      <td>Up</td>\n      <td>Down</td>\n      <td>Down</td>\n      <td>Up</td>\n      <td>Up</td>\n      <td>Negative</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 4,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.shape"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 5,
          "data": {
            "text/plain": "(1171, 16)"
          },
          "metadata": {}
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = dataset[['Open', 'Open_N', 'Volume_N']].values\n",
        "y = dataset['Up_Down']"
      ],
      "outputs": [],
      "execution_count": 6,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn import preprocessing\n",
        "le_Open = preprocessing.LabelEncoder()\n",
        "le_Open.fit(['Up','Down'])\n",
        "X[:,1] = le_Open.transform(X[:,1]) \n",
        "\n",
        "le_Volume = preprocessing.LabelEncoder()\n",
        "le_Volume.fit(['Positive', 'Negative'])\n",
        "X[:,2] = le_Volume.transform(X[:,2]) "
      ],
      "outputs": [],
      "execution_count": 7,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.model_selection import train_test_split  \n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)"
      ],
      "outputs": [],
      "execution_count": 8,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.tree import DecisionTreeClassifier  \n",
        "classifier = DecisionTreeClassifier()  \n",
        "classifier.fit(X_train, y_train)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 9,
          "data": {
            "text/plain": "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n            max_features=None, max_leaf_nodes=None,\n            min_impurity_decrease=0.0, min_impurity_split=None,\n            min_samples_leaf=1, min_samples_split=2,\n            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n            splitter='best')"
          },
          "metadata": {}
        }
      ],
      "execution_count": 9,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Modeling\n",
        "Up_Down_Tree = DecisionTreeClassifier(criterion=\"entropy\", max_depth = 4)\n",
        "Up_Down_Tree"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 10,
          "data": {
            "text/plain": "DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=4,\n            max_features=None, max_leaf_nodes=None,\n            min_impurity_decrease=0.0, min_impurity_split=None,\n            min_samples_leaf=1, min_samples_split=2,\n            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n            splitter='best')"
          },
          "metadata": {}
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "Up_Down_Tree.fit(X_train,y_train)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 11,
          "data": {
            "text/plain": "DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=4,\n            max_features=None, max_leaf_nodes=None,\n            min_impurity_decrease=0.0, min_impurity_split=None,\n            min_samples_leaf=1, min_samples_split=2,\n            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n            splitter='best')"
          },
          "metadata": {}
        }
      ],
      "execution_count": 11,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Prediction\n",
        "predTree = Up_Down_Tree.predict(X_test)"
      ],
      "outputs": [],
      "execution_count": 12,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(predTree[0:5])\n",
        "print(y_test[0:5])"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['Down' 'Up' 'Down' 'Up' 'Down']\n",
            "Date\n",
            "2018-01-23    Down\n",
            "2016-07-27      Up\n",
            "2015-10-06    Down\n",
            "2016-03-09      Up\n",
            "2014-05-06      Up\n",
            "Name: Up_Down, dtype: object\n"
          ]
        }
      ],
      "execution_count": 16,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Evaluation\n",
        "from sklearn import metrics\n",
        "print(\"DecisionTrees's Accuracy: \", metrics.accuracy_score(y_test, predTree))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "DecisionTrees's Accuracy:  0.6\n"
          ]
        }
      ],
      "execution_count": 17,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Accuracy Score without Sklearn\n",
        "boolian = (y_test==predTree)\n",
        "accuracy = sum(boolian)/y_test.size\n",
        "accuracy"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 19,
          "data": {
            "text/plain": "0.6"
          },
          "metadata": {}
        }
      ],
      "execution_count": 19,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "mimetype": "text/x-python",
      "pygments_lexer": "ipython3",
      "name": "python",
      "file_extension": ".py",
      "nbconvert_exporter": "python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "version": "3.5.5"
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    },
    "nteract": {
      "version": "0.28.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}